import holoviews as hv
hv.extension('bokeh')
%opts Graph [width=600 height=400]
Thurston Sexton + Mike Brundage
import warnings
warnings.simplefilter(action='ignore')
# samp = ['broken', 'motor' ]
h_or_i = df.MACH.str.match(r'^[HI][0-9]*$').fillna(False)
is_broke = (tag_df.P['broken']>0)
# df.MACH[h_or_i]
cond = h_or_i & is_broke
idx_col = pd.DatetimeIndex(df['DATE RECEIVED'])
# cond = (tag_df.P['broken']>0)#|(tag_df.S['replace']>0)
sample_tag = tag_df.loc[cond,tag_df.loc[cond].sum()>1]
sample_tag.columns = sample_tag.columns.droplevel(0)
sample_tag = pd.concat([sample_tag, df.MACH[cond]], axis=1)
sample_tag['date'] = idx_col[cond]
sample_tag.loc[:,'mach_type'] = sample_tag.MACH.str[0]#.astype('category')
sample_tag['tbf'] = sample_tag.sort_values(['MACH','date']).groupby('MACH')['date'].diff()
sample_tag.loc[:,'tbf'] = sample_tag.tbf/pd.Timedelta(days=1)#.dt.total_seconds() / (24 * 60 * 60)
# (sample_tag.groupby('MACH')['tbf'].sum()/sample_tag.groupby('MACH')['tbf'].count()).sort_values()
sample_tag.mach_type.value_counts()
# sns.boxplot(y='tbf', x='MACH', data=sample_tag.dropna().reset_index())
# sample_tag.dropna(subset=['tbf']).plot(y='tbf', x='MACH', kind='box')
plt.figure(figsize=(5,10))
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
# sns.violinplot(data=sample_tag[['mach_type', 'tbf', 'MACH']].dropna(),
# y='MACH', x='tbf', hue='mach_type', cut=0, orient='h', scale='count', bw=.4)
samps = sample_tag[['mach_type', 'tbf', 'MACH']].dropna()
# order = samps.groupby('MACH').mean().sort_values('tbf').index
order = samps.MACH.value_counts().index
import matplotlib.gridspec as gridspec
fig = plt.figure(tight_layout=True, figsize=(12,8))
gs = gridspec.GridSpec(2, 2)
# with sns.axes_style('whitegrid') as style,\
# sns.plotting_context('poster') as context:
ax1 = fig.add_subplot(gs[:,0])
sns.boxplot(data=samps, y='MACH', x='tbf',
hue='mach_type', orient='h',
order=order[:20], notch=False,
ax = ax1)
plt.xlabel('days');
# plt.xlim(0,4000)
plt.title('Time Between Failure ("broken")')
ax1.set(xlim=(0,250));
from lifelines import WeibullFitter, ExponentialFitter, KaplanMeierFitter
def mask_to_ETraw(df_clean, mask, fill_null=1.):
filter_df = df_clean.loc[mask]
g = filter_df.sort_values('DATE RECEIVED').groupby('MACH')
T = g['DATE RECEIVED'].transform(pd.Series.diff)/pd.Timedelta(days=1)
# assume censored when parts replaced (changeout)
E = (~(tag_df.S['replaced']>0)).astype(int)[mask]
T_defined = (T>0.)&T.notna()
return T[T_defined], E[T_defined]
ax3 = fig.add_subplot(gs[-1,-1])
ax2 = fig.add_subplot(gs[0,-1], sharex=ax3)
T, E = mask_to_ETraw(df, cond)
kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E, label='Machine K-M')
kmf.plot(show_censors=True, censor_styles={'marker':'|'}, ax=ax2, color='xkcd:gray')
ax2.set(xlim=(0,250), ylabel=r'$S(t)$', title='Kaplan-Meier Survival Function');
i_ = df.MACH.str.match(r'^[I][0-9]*$').fillna(False)
T, E = mask_to_ETraw(df, i_&is_broke)
kmf.fit(T, event_observed=E, label='I-type K-M')
kmf.plot(show_censors=True, censor_styles={'marker':'|'}, ax=ax3)
h_ = df.MACH.str.match(r'^[H][0-9]*$').fillna(False)
T, E = mask_to_ETraw(df, h_&is_broke)
kmf.fit(T, event_observed=E, label='H-type K-M')
kmf.plot(show_censors=True, censor_styles={'marker':'|'}, ax=ax3)
ax3.set(xlim=(0,250), ylabel=r'$S(t)$', xlabel='days');
<Figure size 360x720 with 0 Axes>
Markers ( | ) indicate a censored observation, interpreted as a maintenance event with no replacements (no 'replaced' tag occurrence).
# top3 = sample_tag.MACH.isin(['H34', 'I19', 'H14'])
# sample_tag[top3, sample_tag.loc[top3,:].sum()>3].groupby('MACH').sum().plot(kind='bar')
# tag_df[sample_tag.MACH=='H34'].sum()
def machine_tags(name, n_reps):
isguy = df['MACH'].str.contains(name, case =False).fillna(False)
return tag_df.loc[isguy,(tag_df.loc[isguy,:].sum()>n_reps).values]
with sns.axes_style('whitegrid') as style,\
sns.plotting_context('talk') as context:
f, ax = plt.subplots(ncols=3, figsize=(15, 5))
for n, mach in enumerate(['H34', 'I19', 'H14']):
mach_df = machine_tags(mach, 6).sum().sort_values()
mach_df.plot(kind='barh', color=[colors[i] for i in mach_df.index.get_level_values(0)], ax=ax[n])
ax[n].set_title(mach)
plt.tight_layout()
H34 issues with motor, brush_unitI19 alarms and/or sensors, potentially coolant-relatedH14 wide array of issues, including operator (!?)%%output size=150 backend='bokeh' filename='machs'
%%opts Text (text_align='right')
%%opts Graph (edge_line_width=4 node_line_color='white', node_size=1)
%%opts EdgePaths [color_index='weight'] (line_width=1, cmap='viridis', color='dodgerblue', alpha=.2)
%%opts Overlay [width=300 legend_position='top_right'] Layout [tabs=True]
%%opts Nodes (size='size' line_color='white')
# padding = dict(x=(-0.05, 1.05), y=(-0.05, 1.05))
hv.Text
kws = {'layout':nx.drawing.spring_layout,
# 'layout_kws':{'prog':'neato'}
'padding':dict(x=(-0.05, 1.05), y=(-0.05, 1.05))
}
layout = hv.Layout([hv_net(machine_tags("H34", 5), name='H34',**kws),
hv_net(machine_tags("I19", 5), name='I19',**kws),
hv_net(machine_tags("H14", 5), name='H14',**kws)
])
layout
islyle = df['Tech Full Name'].str.contains('Lyle Cookson').fillna(False)
df['Description'][islyle].value_counts()
Base cleaning requested 11 Base needs to be cleaned 8 Clean base 4 Base cleaning 3 Base clean 3 Base required cleaning 2 Cooling unit faults 2 Base cleaning req 2 Clean base -coolant sticky 1 Parts receiver prox cable shorting sensor 1 Clean out Sinico 1 Shipping cart has worn wheels 1 Chips in base obstructin coolant flow to pump 1 Base full 1 Base cleaning Requested 1 Coolant tank needs to be cleaned 1 Base needs to be cleaned -Opers overfilling and spilling on floor 1 Base cleaning -caused fire 1 Clean base to install SS chip catcher 1 Base has hydraulic fluid -Drain/Clean 1 Drain and clean tank -Do not refill 1 Base cleaning requested -Oil lines clogging 1 Repair paper filter system 1 Coolant base needs to be cleaned 1 Name: Description, dtype: int64
df['Description'][df['Tech Full Name'].str.contains('Lyle Cookson').fillna(False)]
def person_tags(name, n_reps):
isguy = df['Tech Full Name'].str.contains(name).fillna(False)
return tag_df.loc[isguy,(tag_df.loc[isguy,:].sum()>n_reps).values]
lyle_tags, steve_tags, andrew_tags = (person_tags('Lyle Cookson', 5),
person_tags('Steve Andreozzi', 20),
person_tags('Anthony Paolillo', 10))
# Glyle, *_ = tag_df_network(lyle_tags)
# Gsteve, *_ = tag_df_network(steve_tags)
# Gandrew, *_ = tag_df_network(andrew_tags)
mach_df = person_tags('Lyle Cookson', 5).sum().sort_values()
# mach_df = mach_df[mach_df>=5]
# with sns.axes_style('whitegrid') as style,\
# sns.plotting_context('poster') as context:
plt.figure(figsize=(5,5))
mach_df.plot(kind='barh', color=[colors[i] for i in mach_df.index.get_level_values(0)])
plt.title('Lyle')
Text(0.5,1,'Lyle')
Threshold to tags happening >=5x
Say we want to compare with other, more "typical" technicians... $\rightarrow$ small problem...
# with sns.axes_style('whitegrid') as style,\
# sns.plotting_context('talk') as context:
f, ax = plt.subplots(ncols=3, figsize=(15, 5))
thres = [5, 20, 10]
for n, mach in enumerate(['Lyle Cookson', 'Steve Andreozzi', 'Anthony Paolillo']):
mach_df = person_tags(mach, 5).sum().sort_values()
# mach_df = mach_df[mach_df>=5]
mach_df.plot(kind='barh', color=[colors[i] for i in mach_df.index.get_level_values(0)], ax=ax[n])
ax[n].set_title(mach.split(' ')[0])
plt.tight_layout()
%%output size=150 backend='bokeh' filename='techs'
%%opts Graph (edge_line_width=4 node_line_color='white', node_size=1)
%%opts EdgePaths [color_index='weight'] (line_width=1, cmap='viridis', color='dodgerblue', alpha=.2)
%%opts Overlay [width=300 legend_position='top_right'] Layout [tabs=True]
%%opts Nodes (size='size' line_color='white')
padding = dict(x=(-0.05, 1.05), y=(-0.05, 1.05))
kws = {'layout':nx.drawing.spring_layout,
# 'layout_kws':{'prog':'neatopusher'}
'padding':dict(x=(-0.05, 1.05), y=(-0.05, 1.05))
}
layout = hv.Layout([hv_net(person_tags('Lyle Cookson', 1), name='Lyle',**kws),
hv_net(person_tags('Steve Andreozzi', 20), name='Steve',**kws),
hv_net(person_tags('Anthony Paolillo', 10), name='Anthony',**kws),
hv_net(person_tags("Norm Neveux", 1), name='Norm',**kws),
hv_net(person_tags("Doug Patrick", 1), name='Doug',**kws),
hv_net(person_tags("Colin Doherty", 5), name='Colin',**kws)
])#.cols(1)
# print(layout)
layout
# graph.edgepaths*graph.nodes
HVAC Case study
from mlp import plot
idx_col = pd.DatetimeIndex(df.REPORTDATE)
samp = ['air_conditioning_unit', 'too_hot', 'too_cold']
cond = (tag_df.P.alarm==1)
sample_tag = tag_df.loc[:,(slice(None), samp)]
sample_tag.columns = sample_tag.columns.droplevel(0)
sample_tag = sample_tag.set_index(idx_col[:])
sample_tag = sample_tag[ sample_tag.index.year.isin([2009, 2010, 2016])]
plot.calendarplot(sample_tag,
how='sum', fig_kws={'figsize':(13,4)});
plt.suptitle('Tag Occurence')
Text(0.5,0.98,'Tag Occurence')
# .rolling(1000).sum()
samp = ['too_cold', 'too_hot']
sample_tag = tag_df.loc[:,(slice(None), samp)]
sample_tag.columns = sample_tag.columns.droplevel(0)
sample_tag = sample_tag.set_index(idx_col).sort_index()
# with sns.axes_style('darkgrid') as style,\
# sns.plotting_context('poster') as context:
plt.figure(figsize=(15,3))
sample_tag.resample('30D').sum()
plt.plot(sample_tag.too_hot.resample('30D').sum(),
marker='.', ls=':', color='xkcd:orangered', label='too_hot')
plt.plot(sample_tag.too_cold.resample('30D').sum(),
marker='.', ls=':', color='xkcd:turquoise blue', label='too_cold')
roll = sample_tag.rolling('30D').sum()
mean = roll.resample('30D').mean()
err = roll.resample('30D').std()
plt.fill_between(mean.index, mean.too_hot-3*err.too_hot, mean.too_hot+3*err.too_hot,
alpha=.4, color='xkcd:orangered')
plt.fill_between(mean.index, mean.too_cold-3*err.too_cold, mean.too_cold+3*err.too_cold,
alpha=.4, color='xkcd:turquoise blue')
sns.despine()
plt.legend()
plt.title('Monthly tag-occurence estimate')
Text(0.5,1,'Monthly tag-occurence estimate')
%%output size=150 filename='nist_hvac_map'
%%opts Polygons [height=350 width=300, tools=['hover'] colorbar=False ] (cmap='RdBu')
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
# , bounds = (-77.222, 39.13, -77.215, 39.14)
# bldg_dict[times[11]].cols(1)
# hv.HoloMap(bldg_dict)
(hv.HoloMap(bldg_dict, 'Time')*text + hv.HoloMap(vlines, 'Time')*temp_curves.opts(temp_curve_spec)).cols(1)